#analysis from last time tadpoles <- read.table("ecol 563/tadpoles.csv", header=T, sep=',') tadpoles$fac3 <- factor(tadpoles$fac3) out1 <- lm(response~fac1 + fac2 + fac3 + fac1:fac2 + fac1:fac3 + fac2:fac3 + fac1:fac2:fac3, data=tadpoles) anova(out1) library(car) Anova(out1) # list levels of a factor levels(tadpoles$fac1) #create an experiment where the treatments are balanced expand.grid(fac1=levels(tadpoles$fac1), fac2=levels(tadpoles$fac2), fac3=levels(tadpoles$fac3)) junk <- expand.grid(fac1=levels(tadpoles$fac1), fac2=levels(tadpoles$fac2), fac3=levels(tadpoles$fac3)) junk2 <- data.frame(fac1=rep(junk$fac1,10), fac2=rep(junk$fac2,10), fac3=rep(junk$fac3,10), y=rnorm(120)) junk2[1:8,] #fit model to balanced data mj <- lm(y~fac1*fac2*fac3, data=junk2) #Type I and Type II tests are the same anova(mj) Anova(mj) #data are unbalanced even with missing response values table(tadpoles$treatment) #get treatment counts after removing observations with missing response values table(tadpoles$treatment[!is.na(tadpoles$response)]) table(tadpoles$fac1[!is.na(tadpoles$response)]) table(tadpoles$fac2[!is.na(tadpoles$response)]) table(tadpoles$fac3[!is.na(tadpoles$response)]) # we dropped a 3-factor interaction and a 2-factor interaction Anova(out1) out2 <- lm(response~fac1 + fac2 + fac3 + fac1:fac2 + fac2:fac3, data=tadpoles) anova(out2) Anova(out2) #variance heterogeneity boxplot(response~treatment, data=tadpoles, horizontal=T, xlab='Mitotic activity', las=1) treat.mean <- tapply(tadpoles$response, tadpoles$treatment, mean, na.rm=T) treat.mean points(treat.mean, 1:12, col=2, pch=8) #calculate variances for each treatment treat.var <- tapply(tadpoles$response, tadpoles$treatment, var, na.rm=T) treat.var #display variances in dot plot library(lattice) dotplot(names(treat.var)~treat.var, xlab='Variance') #residuals look heavy-tailed relative to a normal dist qqPlot(residuals(out2)) #tests for variance homogeneity bartlett.test(tadpoles$response~tadpoles$treatment) fligner.test(tadpoles$response~tadpoles$treatment) leveneTest(response~treatment, data=tadpoles) leveneTest(response~treatment, data=tadpoles, center=mean) #use weighted least squares library(nlme) out.g <- gls(response~fac1+fac2+fac3+fac1:fac2+fac2:fac3, data=tadpoles, na.action=na.omit, method='ML') summary(out.g) anova(out.g) #let each treatment have its own variance out.g1 <- gls(response~fac1+fac2+fac3+fac1:fac2+fac2:fac3, data=tadpoles, na.action=na.omit, method='ML', weights=varIdent(form=~1|treatment)) anova(out.g1) #Anova doesn't work on gls object Anova(out.g1) #anova has a special method for gls objects: type argument anova(out.g1, type='sequential') anova(out.g1, type='marginal') summary(out.g1) #estimates have not changed much coef(out.g1) coef(out.g) #compare residuals qqPlot(residuals(out.g)) qqPlot(residuals(out.g1, type='normalized') #simulate from final model simulate(out2,n=1) #six simulated data sets junk <- simulate(out2,n=6) #generates a data frame class(junk) dim(junk) #observations with missing values for response area excluded dim(tadpoles) length(tadpoles$response[!is.na(tadpoles$response)]) #using the rep function rep(1:3,4) rep(1:3,c(4,4,4)) #repeat the number of simulated values rep(nrow(junk),6) my.dat1 <- data.frame(sims=unlist(junk),simnum = rep(1:6,rep(nrow(junk),6))) my.dat1[1:8,] # display kernel density estimates of distribution of simulated data densityplot(~sims|simnum, data=my.dat1) densityplot(~sims|factor(simnum), data=my.dat1, plot.points=F) # add kernel density of the raw data densityplot(~sims|factor(simnum), data=my.dat1, panel=function(x) { panel.densityplot(x, plot.points=F) panel.densityplot(tadpoles$response[!is.na(tadpoles$response)], plot.points=F, col=2) }) # remove missing response observations from treatment and response variable !is.na(tadpoles$response) short.treatment <- tadpoles$treatment[!is.na(tadpoles$response)] short.response <- tadpoles$response[!is.na(tadpoles$response)] #calculate maximum response in CoD1 treatment short.response[short.treatment=='CoD1'] CoD1.max <- max(short.response[short.treatment=='CoD1']) CoD1.max #obtain 1000 simulated data sets junk<-simulate(out2,n=1000) colnames(junk)[1:10] #we can access the elements using $, [,], or [[]] notation junk$sim_1 junk[,1] junk[[1]] #maximum response for coD1 treatment in simulation 1 max(junk[[1]][short.treatment=='CoD1']) #obtain maximum for CoD1 treatment in each simulation sim.max <- sapply(1:1000, function(x) max(junk[[x]][short.treatment=='CoD1'])) #the maximum simulated maxium is less than the actual maxiumum max(sim.max) CoD1.max #graph the density plot(density(sim.max)) #expand x-limits to make room for actual maximum c(min(sim.max), max(c(sim.max,CoD1.max))) plot(density(sim.max), xlim=c(min(sim.max), max(c(sim.max,CoD1.max)))) #add actual maximum points(CoD1.max, 0, col=2, pch=8)